##### restructing SEP wace 8 data for conjoint analysis ######


rm(list = ls())


# Load Packages 
library(tidyverse)
library(haven)
library(labelled)
library(data.table)
library(stringr)
library(cregg)

# Load data
w8 <- read_dta("/Volumes/1709-UmweltPanel/data/w8/final_data/upanel_w8.dta") %>% 
  zap_formats() %>% 
  zap_label %>% 
  as.data.frame()

# Save conjoint data
df_all <- w8 %>% 
  # subset to conjoint participants (this also filters unit non-response)
  filter(w8_surveyarm == 1) %>% 
  # select conjoint variables
  select(PubId, contains("q32"), contains("conj")) %>% 
  # get rid of variables indicating attribute order of appearance
  select(-contains(c("r1attr", "r2attr", "r3attr", "r4attr"))) 



# data prep ---------------------------------------------------------------

# Drop observations with any missings
df <- df_all %>% 
  filter_all(all_vars(. >= 0))

# Examine missings
dropped <- anti_join(df_all, df)

# drop product randomisation from main df ---------------------------------
df_final <- df %>% 
  select(-w8_treat_conj_buy)

# Save coding scheme of attribute variables (one for each)
labels <- df %>% 
  select(contains("w8_conjoint_buy_r"))

conj_attr1 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1price)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1price)))

conj_attr2 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1life)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1life)))

conj_attr3 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1env)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1env)))

conj_attr4 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1eff)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1eff)))

conj_attr5 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1recy)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1recy)))

conj_attr6 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1rep)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1rep)))


# data wrangling ----------------------------------------------------------

dfconj <- df_final


### Stretch dataframe
dfconj_long <- dfconj %>% 
  pivot_longer(-PubId, names_to = "variable", values_to = "value")

dfconj_long <- dfconj_long %>% 
  mutate(value = as.numeric(value))

### Add policy, round, variable type indicators 

# Generate product number variable
dfconj_long <- dfconj_long %>% 
  mutate(product = as.numeric(str_sub(str_extract(variable, "d\\d"), 2)))

# Generate experimental round variable
dfconj_long <- dfconj_long %>% 
  mutate(round = as.numeric(str_sub(str_extract(variable, "r\\d"), 2)))

# Generate product attribute variables
dfconj_long <- dfconj_long %>% 
  mutate(vars = paste0(str_sub(str_extract(variable, "(?<=\\d)[a-z]+$"), 1)))

# Generate choice variable
dfconj_long <- dfconj_long %>% 
  mutate(vars = ifelse(str_detect(variable, "q32x1"), "choice1", 
                       ifelse(str_detect(variable, "q32x2"), "choice2", 
                              ifelse(str_detect(variable, "q32x3"), "choice3",
                                     ifelse(str_detect(variable, "q32x4"), "choice4",
                                     vars)))))

# Fill in missing round variable
dfconj_long <- dfconj_long %>% 
  mutate(round = ifelse(str_detect(vars, "choice1"), 1, 
                       ifelse(str_detect(vars, "choice2"), 2, 
                              ifelse(str_detect(vars, "choice3"), 3,
                                     ifelse(str_detect(vars, "choice4"), 4,
                                            round)))))

# Rename choice variable

dfconj_long <- dfconj_long %>%
  mutate(vars = ifelse(str_detect(vars, "choice1|choice2|choice3|choice4"), "choice", vars))

# Remove original variable names 
dfconj_long <- dfconj_long %>% select(-variable)

# Widen data to observations per Id per round
dfconj <- dfconj_long %>%
  pivot_wider(names_from = vars, values_from = value)

### Bring attribute, choice and rate in one row per respondent-round-package

# Add Id-Round Indicator 
dfconj <- dfconj %>% mutate(idround = paste0(PubId, "-", round))

# Replace NAs for choice in rows with attribute info
dfconj <- dfconj %>% 
  group_by(idround) %>% 
  mutate(choice = replace_na(choice[!is.na(choice)])) %>% 
  ungroup()

# Transform choice to binary indicator (package chosen or not)
dfconj = dfconj %>% 
  mutate(choice = ifelse(product == choice, 1, 0))

# Filter rows and select variables
dfconj <- dfconj %>% 
  # Keep one row per respondent-round-package
  filter(!is.na(product)) %>% 
  # Drop duplicated variables 
  select(-idround)


# quick check -------------------------------------------------------------

# Manually type number of conjoint rounds and products per round
nrounds = 4
nproduct = 2

# Correct number of rows?
nrow(df_final)*nrounds*nproduct == nrow(dfconj)

# For every policy a respondent chose, the respondent did not choose another
sum(dfconj$choice == 0) == sum(dfconj$choice == 1) 


# assign attribute names --------------------------------------------------

dfconj = dfconj %>% 
  mutate(price = 
           factor(price, levels = conj_attr1$code, labels = conj_attr1$attr),
         life = 
           factor(life, levels = conj_attr2$code, labels = conj_attr2$attr),
         env = 
           factor(env, levels = conj_attr3$code, labels = conj_attr3$attr),
         eff = 
           factor(eff, levels = conj_attr4$code, labels = conj_attr4$attr),
         recy = 
           factor(recy, levels = conj_attr5$code, labels = conj_attr5$attr),
         rep = 
           factor(rep, levels = conj_attr6$code, labels = conj_attr6$attr))

# Drop unused levels
dfconj = droplevels(dfconj)

# Rename attribute levels
dfconj <- dfconj %>%
  mutate(price = recode(price, 'Preis: {[180 CHF] (Smartphone) // [250 CHF] (Fernseher) // [470 CHF] (Waschmaschine)}' = 
                        '180 / 250 / 470 CHF', 
                        'Preis: {[550 CHF] (Smartphone) // [850 CHF] (Fernseher) // [890 CHF] (Waschmaschine)}' =
                          '550 / 850 / 890 CHF',
                        'Preis: {[920 CHF] (Smartphone) // [1900 CHF] (Fernseher) // [1650 CHF] (Waschmaschine)}' =
                          '920 / 1900 / 1650 CHF',
                        'Preis: {[1300 CHF] (Smartphone) // [3100 CHF] (Fernseher) // [3200 CHF] (Waschmaschine)}' =
                          '1300 / 3100 / 3200 CHF')) %>% 
  mutate(life = recode(life, 'Funktionsdauer ca. {[2] (Smartphone) // [2] (Fernseher) // [5] (Waschmaschine)} Jahre' =
                         '2 / 2 / 5 years',
                       'Funktionsdauer ca. {[4] (Smartphone) // [8] (Fernseher) // [10] (Waschmaschine)} Jahre' =
                         '4 / 8 / 10 years',
                       'Funktionsdauer ca. {[8] (Smartphone) // [15] (Fernseher) // [15] (Waschmaschine)} Jahre' =
                         '8 / 15 / 15 years')) %>% 
  mutate(env = recode(env, 'Sehr niedrige Umweltbelastung' = 'Very low',
                      'Mittlere Umweltbelastung' = 'Medium', 
                      'Sehr hohe Umweltbelastung' = 'Very high')) %>% 
  mutate(eff = recode(eff, 'Energieeffizienzklasse G' = 'G',
                      'Energieeffizienzklasse E' = 'E', 
                      ' Energieeffizienzklasse C' = 'C',
                      'Energieeffizienzklasse A' = 'A')) %>% 
  mutate(recy = recode(recy, 'Nicht rezyklierbar' = 'Not recyclable',
                       'Beschränkt rezyklierbar' = 'Limited recyclable',
                       'Gut rezyklierbar' = 'Recyclable')) %>% 
  mutate(rep = recode(rep, 'Nicht reparierbar' = 'Not repairable at all',
                      'Gut reparierbar' = 'Repairable',
                      'Beschränkt reparierbar' = 'Repairable to a limited degree')) %>% 
  rename(Price = price, 'Functional duration' = life, 'Env. production impact' = env, 
         'Energy efficiency' = eff, Recyclability = recy, Repairability = rep)
                
# Generate variable indicating which randomized product was displayed

df2 <- df %>% 
  select(PubId, w8_treat_conj_buy)

dfconj <- merge(dfconj, df2, by = "PubId", all = TRUE)

dfconj <- dfconj %>% 
  mutate(w8_treat_conj_buy = recode(as.factor(w8_treat_conj_buy), '1' = 'Smartphone',
                                    '2' = 'TV',
                                    '3' = 'Washing machine')) %>% 
  rename(product_type = w8_treat_conj_buy)

# save conjoint dataframe 
write.csv(dfconj, file = "/Users/cbrugge/Desktop/CE Paper/w8_conjoint.csv")
save(dfconj, file = "/Users/cbrugge/Desktop/CE Paper/w8_conjoint.RData")

# subgroups data w8

w8_subgroups <- read_dta("/Volumes/1709-UmweltPanel/data/w8/final_data/upanel_w8.dta") %>% 
  zap_formats() %>% 
  zap_label() %>% 
  as.data.frame()

# get relevant variables

w8_subgroups <- w8_subgroups %>% 
  select(PubId, w8_q1, w8_q2, w8_q8x1, w8_q8x3, w8_q9x1, w8_q9x2, w8_q9x3, w8_q9x4, 
         w8_q10x1, w8_q10x2, w8_q10x3, w8_q10x4, w8_q11x1, w8_q11x2, w8_q13x1, 
         w8_q13x2, w8_q13x3, w8_q13x4, w8_q14x1, w8_q14x5, w8_q34, w8_q35, w8_q36,
         w8_q37)


# merge with w8 data

w8_subgroups <- merge(dfconj, w8_subgroups, by = "PubId", all = TRUE) %>%
  filter(!is.na(round))

# subgroups data w7

w7_subgroups <- read_dta("/Volumes/1709-UmweltPanel/data/w7/final_data/upanel_w7.dta") %>% 
  zap_formats() %>% 
  zap_label() %>% 
  as.data.frame()

# get relevant variables

w7_subgroups <- w7_subgroups %>% 
  select(PubId, w7_q5, w7_q40x1, w7_q10x1, w7_q10x2, w7_q10x3, w7_q10x4, w7_q10x5, w7_q10x6,
         w7_q10x7, w7_q10x8, w7_q10x9, w7_q23x1, w7_q26)


# merge with w8 data

w7_w8_subgroups <- merge(w8_subgroups, w7_subgroups, by = "PubId", all = TRUE)%>%
  filter(!is.na(round))


# Replace all values below 0 with NA
w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate_if(is.numeric, ~ ifelse(. < 0, NA, .))


# environmental attitudes: create median split variable 

w7_w8_subgroups <- w7_w8_subgroups %>% 
  #  filter(w7_q10x6 >= 0) %>% 
  mutate(w7_q10x6 = recode(as.numeric(w7_q10x6), # invert scale for q10x6
                           '1' = '5',
                           '2' = '4',
                           '3' = '3',
                           '4' = '2', 
                           '5' = '1')) 

w7_w8_subgroups$w7_q10x1 <- as.numeric(w7_w8_subgroups$w7_q10x1)
w7_w8_subgroups$w7_q10x2 <- as.numeric(w7_w8_subgroups$w7_q10x2)
w7_w8_subgroups$w7_q10x3 <- as.numeric(w7_w8_subgroups$w7_q10x3)
w7_w8_subgroups$w7_q10x4 <- as.numeric(w7_w8_subgroups$w7_q10x4)
w7_w8_subgroups$w7_q10x5 <- as.numeric(w7_w8_subgroups$w7_q10x5)
w7_w8_subgroups$w7_q10x6 <- as.numeric(w7_w8_subgroups$w7_q10x6)
w7_w8_subgroups$w7_q10x7 <- as.numeric(w7_w8_subgroups$w7_q10x7)
w7_w8_subgroups$w7_q10x8 <- as.numeric(w7_w8_subgroups$w7_q10x8)
w7_w8_subgroups$w7_q10x9 <- as.numeric(w7_w8_subgroups$w7_q10x9)

# generate mean variable across all individual items


w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(env_attitudes_sum = rowSums(across(w7_q10x1:w7_q10x9)))

w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(env_attitudes_mean = rowMeans(across(w7_q10x1:w7_q10x9))) 

w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(env_attitudes_dich = ifelse(env_attitudes_mean >= median(env_attitudes_mean, na.rm = TRUE),
                                     "pro-environment", "anti-environment"))


# political ideology

w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(political_ideology = cut(w7_q26, breaks = c(-Inf, 3, 8, Inf),
                                  labels = c("left", "center", "right"),
                                  include.lowest = TRUE))


w7_w8_subgroups <- w7_w8_subgroups %>% 
  rename(wtp_env_prot = w7_q23x1,
         wtp_warranty = w8_q13x1) 

# recode Likert scale items on importance of repairability/recyclability

# Define labels for re-coded groups
labels_1 <- c("Not important", "important")

# create importance of repairability variable
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(importance_repairability = cut(w8_q14x1, breaks = c(0, 3, 5),
                                        labels = labels_1, include.lowest = TRUE))

# create importance of recylcability variable
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(importance_recyclability = cut(w8_q14x5, breaks = c(0, 3, 5),
                                        labels = labels_1, include.lowest = TRUE))

# create variable indicating support for mandatory circular labelling

# Create the new variable "prefs_mandatory_labelling" as the average of the four variables

# prefs for mandatory labelling of recycling

w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(prefs_mandatory_labelling_recy = rowMeans(select(., w8_q37), na.rm = TRUE))

# Define the labels for the groups
labels2 <- c("Disagree or Neutral", "Agree")

# Mutate column based on the new variable
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(prefs_mandatory_labelling_recy = cut(prefs_mandatory_labelling_recy,
                                                  breaks = c(0, 3, 5),
                                                  labels = labels2, include.lowest = TRUE))

# prefs for mandatory labelling of repairability
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(prefs_mandatory_labelling_rep = rowMeans(select(., w8_q37), na.rm = TRUE))


# Mutate column based on the new variable
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(prefs_mandatory_labelling_rep = cut(prefs_mandatory_labelling_rep,
                                              breaks = c(0, 3, 5),
                                              labels = labels2, include.lowest = TRUE))

# prefs for mandatory labelling of durability

w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(prefs_mandatory_labelling_duration = rowMeans(select(., w8_q36), na.rm = TRUE))


# Mutate column based on the new variable
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(prefs_mandatory_labelling_duration = cut(prefs_mandatory_labelling_duration,
                                             breaks = c(0, 3, 5),
                                             labels = labels2, include.lowest = TRUE))


# recode variable for descriptive WTP for env protection

# Define labels for re-coded groups
labels_3 <- c("Not willing", "Neutral", "Very willing")

# create importance of repairability variable
w7_w8_subgroups <- w7_w8_subgroups %>% 
  mutate(wtp_env_prot = cut(w8_q14x1, breaks = c(0, 2, 3, 5),
                                        labels = labels_3, include.lowest = TRUE))


# create variable for expectation of impact of circularity on Swiss economy

# Calculate average of variables
w7_w8_subgroups$avg_w8_q10 <- rowMeans(w7_w8_subgroups[, c("w8_q10x1", "w8_q10x2", "w8_q10x3", "w8_q10x4")], na.rm = TRUE)

# Define labels for re-coded groups
labels_4 <- c("disadvantagefor SE", "Neutral", "benefits for SE")

# Generate "swiss_economy" variable based on groups
w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(swiss_economy = cut(avg_w8_q10, breaks = c(0, 2, 3, 5),
                            labels = labels_4, include.lowest = TRUE))


# gender 

# Rename the variable "w8_q2" to "Gender"
w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(Gender = ifelse(w8_q2 == 1, "Female", 
                         ifelse(w8_q2 == 2, "Male", 
                                ifelse(w8_q2 == 3, NA, as.character(w8_q2)))))



# Print the first few rows of the dataset to verify the rename
head(w7_w8_subgroups)

# income 

# Recode the values with appropriate value names
w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(income = case_when(
    w7_q40x1 %in% c(2, 3) ~ "Low (=< 6'000 CHF)",
    w7_q40x1 %in% c(4, 5, 6, 7) ~ "Medium (6'001 - 14'000 CHF)",
    w7_q40x1 %in% c(8, 9, 10) ~ "High (>= 14'001 CHF)",
    TRUE ~ "NA")) %>% 
      mutate(as.character(income)) %>% 
    rename(Income = income)


# age


# Data collection was in 2022
current_year <- 2022

w7_w8_subgroups <- w7_w8_subgroups %>%
  rename(age = w8_q1) %>%
  mutate(age = current_year - age,  # Calculate age based on birth year
         Age = cut(age,
                        breaks = c(-Inf, 29, 49, 69, Inf),
                        labels = c("<= 29 years", "30-49 years", "50-69 years", ">= 70 years"),
                        include.lowest = TRUE))

# Print the unique values in the new AgeGroup variable
print(unique(w7_w8_subgroups$Age))



# create CE score 

# recode and standardize CE attributes

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(Price_recoded = case_when(
    Price %in% c("180 / 250 / 470 CHF") ~ 1,
    Price %in% c("550 / 850 / 890 CHF") ~ 2,
    Price %in% c("920 / 1900 / 1650 CHF") ~ 3,
    Price %in% c("1300 / 3100 / 3200 CHF") ~ 4))

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(env_impacts_recoded = case_when(
    `Env. production impact` %in% c("Very low") ~ 1,
    `Env. production impact` %in% c("Medium") ~ 2,
    `Env. production impact` %in% c("Very high") ~ 3))


w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(Functional_duration_recoded = case_when(
    `Functional duration` %in% c("2 / 2 / 5 years") ~ 1,
    `Functional duration` %in% c("4 / 8 / 10 years") ~ 2,
    `Functional duration` %in% c("8 / 15 / 15 years") ~ 3))

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(Energy_efficiency_recoded = case_when(
    `Energy efficiency` == "G" ~ 1,
    `Energy efficiency` == "E" ~ 2,
    `Energy efficiency` == "C" ~ 3,
    `Energy efficiency` == "A" ~ 4))

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(Recyclability_recoded = case_when(
    Recyclability == "Not recyclable" ~ 1,
    Recyclability == "Limited recyclable" ~ 2,
    Recyclability == "Recyclable" ~ 3))

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(Repairability_recoded = case_when(
    Repairability == "Not repairable at all" ~ 1,
    Repairability == "Repairable to a limited degree" ~ 2,
    Repairability == "Repairable" ~ 3))


# recode between 0 and 1

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(
    Functional_duration_recoded_scaled = (Functional_duration_recoded - min(Functional_duration_recoded)) / (max(Functional_duration_recoded) - min(Functional_duration_recoded)),
    Energy_efficiency_recoded_scaled = (Energy_efficiency_recoded - min(Energy_efficiency_recoded)) / (max(Energy_efficiency_recoded) - min(Energy_efficiency_recoded)),
    Recyclability_recoded_scaled = (Recyclability_recoded - min(Recyclability_recoded)) / (max(Recyclability_recoded) - min(Recyclability_recoded)),
    Repairability_recoded_scaled = (Repairability_recoded - min(Repairability_recoded)) / (max(Repairability_recoded) - min(Repairability_recoded))
  )


w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(
    ce_index = rowMeans(select(., ends_with("_recoded_scaled")), na.rm = TRUE)
  )

w7_w8_subgroups <- w7_w8_subgroups %>%
  mutate(
    ce_category = cut(ce_index, 
                      breaks = c(-Inf, 0.33, 0.66, Inf), 
                      labels = c("Low", "Medium", "High"),
                      include.lowest = TRUE))

w7_w8_subgroups <- w7_w8_subgroups %>%
  rename(Education = w7_q5) 
    
# Save subgroups dataframe 
write.csv(w7_w8_subgroups, file = "/Users/cbrugge/Desktop/CE Paper/w7_w8_subgroups.csv")
save(w7_w8_subgroups, file = "/Users/cbrugge/Desktop/CE Paper/w7_w8_subgroups.RData")

# Copy the dataset to avoid modifying the original
w7_w8_subgroups_stata <- w7_w8_subgroups

# Define a function to create valid Stata variable names
make_valid_stata_name <- function(name) {
  # Remove spaces and replace special characters with underscores
  gsub("[^A-Za-z0-9_]", "_", name)
}

# Apply the function to all column names
colnames(w7_w8_subgroups_stata) <- make_valid_stata_name(colnames(w7_w8_subgroups_stata))

# Rename specific variables
w7_w8_subgroups_stata <- w7_w8_subgroups_stata %>%
  rename(
    recy_scaled = Recyclability_recoded_scaled,
    dur_scaled = Functional_duration_recoded_scaled,
    eff_scaled = Energy_efficiency_recoded_scaled,
    prefs_label_dur = prefs_mandatory_labelling_duration,
    prefs_label_recy = prefs_mandatory_labelling_recy,
    prefs_label_rep = prefs_mandatory_labelling_rep,
    rep_scaled = Repairability_recoded_scaled
  )

# Save as Stata (.dta) file
write_dta(w7_w8_subgroups_stata, "/Users/cbrugge/Desktop/CE Paper/w7_w8_subgroups_stata.dta")